Sanorama Hie et al., 2019
GitHub
Tutorial external API
External external API tutorial
A fix to run scran pooling normalization computeSumFactors in current python environment.
import scanpy as sc
import scanorama
import numpy as np
import pandas as pd
import os
# Working directory
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
# rpy2
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'
# Plotting
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}
sc.set_figure_params(figsize=(5, 5))
adata = sc.read_h5ad('data/object/so_sct.h5ad')
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_8000 = list(adata.uns['hvg_int_8000'])
adata = adata[:,hvg_8000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_4000 = list(adata.uns['hvg_int_4000'])
adata = adata[:,hvg_4000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 4000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata[:,hvg_2000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 2000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata.write_h5ad('data/object/adata_sct_hvg2000.h5ad')
import sys
sys.path.insert(0, '../scFacility/script')
from dirFacility import adata2dir
adata2dir(adata, 'data/object/adata_sct_hvg2000/', assay="RNA", layers="counts", build_dir=True, overwrite=True)
Creating output directorydata/object/adata_sct_hvg2000/ Writing meta data: data/object/adata_sct_hvg2000/meta/meta.csv Writing assay data for the layers: Writing data for the reduction: ... X_scanorama ... X_umap
adata = sc.read_h5ad('data/object/so_sct_reg.h5ad')
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct_reg.h5ad')
hvg_8000 = list(adata.uns['hvg_int_8000'])
adata = adata[:,hvg_8000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets Processing datasets (0, 1)
# Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct_reg.h5ad')
hvg_4000 = list(adata.uns['hvg_int_4000'])
adata = adata[:,hvg_4000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 4000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct_reg.h5ad')
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata[:,hvg_2000]
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
/tmp/ipykernel_3028845/1707382715.py:7: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
adata_sub = dict()
for sample_group in adata.obs['treatment'].unique():
adata_sub[sample_group] = adata[adata.obs['treatment']==sample_group].copy()
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=200, knn=50, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 2000 genes among all datasets Processing datasets (0, 1)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=100, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata.write_h5ad('data/object/adata_sct_reg_hvg2000.h5ad')
import sys
sys.path.insert(0, '../scFacility/script')
from dirFacility import adata2dir
adata2dir(adata, 'data/object/adata_sct_reg_hvg2000/', assay="RNA", layers="counts", build_dir=True, overwrite=True)
Creating output directorydata/object/adata_sct_reg_hvg2000/ Writing meta data: data/object/adata_sct_reg_hvg2000/meta/meta.csv Writing assay data for the layers: Writing data for the reduction: ... X_scanorama ... X_umap